In [ ]:
%pip install pandas numpy plotly
%pip install --upgrade nbformat
Requirement already satisfied: pandas in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (2.1.4) Requirement already satisfied: numpy in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (1.26.3) Requirement already satisfied: plotly in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (5.18.0) Requirement already satisfied: pytz>=2020.1 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from pandas) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from pandas) (2023.4) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\looper\appdata\roaming\python\python310\site-packages (from pandas) (2.8.2) Requirement already satisfied: tenacity>=6.2.0 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from plotly) (8.2.3) Requirement already satisfied: packaging in c:\users\looper\appdata\roaming\python\python310\site-packages (from plotly) (23.2) Requirement already satisfied: six>=1.5 in c:\users\looper\appdata\roaming\python\python310\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0) Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip available: 22.3.1 -> 23.3.2 [notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: nbformat in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (5.9.2) Requirement already satisfied: jupyter-core in c:\users\looper\appdata\roaming\python\python310\site-packages (from nbformat) (5.7.1) Requirement already satisfied: traitlets>=5.1 in c:\users\looper\appdata\roaming\python\python310\site-packages (from nbformat) (5.14.1) Requirement already satisfied: jsonschema>=2.6 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from nbformat) (4.20.0) Requirement already satisfied: fastjsonschema in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from nbformat) (2.19.1) Requirement already satisfied: rpds-py>=0.7.1 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from jsonschema>=2.6->nbformat) (0.16.2) Requirement already satisfied: attrs>=22.2.0 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from jsonschema>=2.6->nbformat) (23.2.0) Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from jsonschema>=2.6->nbformat) (2023.12.1) Requirement already satisfied: referencing>=0.28.4 in c:\users\looper\appdata\local\programs\python\python310\lib\site-packages (from jsonschema>=2.6->nbformat) (0.32.1) Requirement already satisfied: pywin32>=300 in c:\users\looper\appdata\roaming\python\python310\site-packages (from jupyter-core->nbformat) (306) Requirement already satisfied: platformdirs>=2.5 in c:\users\looper\appdata\roaming\python\python310\site-packages (from jupyter-core->nbformat) (4.1.0) Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip available: 22.3.1 -> 23.3.2 [notice] To update, run: python.exe -m pip install --upgrade pip
In [ ]:
import pandas as pd
import numpy as np
import plotly.express as px
data= pd.read_csv("spotify-2023.csv")
data['streams'] = pd.to_numeric(data['streams'], errors='coerce')
In [ ]:
most_streamed = data.loc[data.groupby('released_year')['streams'].idxmax()]
clean_data = most_streamed[['track_name', 'artist(s)_name', 'released_year', 'streams']]
fig = px.bar(clean_data, x='released_year', y='streams')
fig.show(renderer='notebook')
In [ ]:
top_songs = data.groupby('released_year').apply(lambda group: group.nlargest(3, 'streams')).reset_index(drop=True)
top_songs = top_songs[['track_name', 'artist(s)_name', 'released_year', 'streams']]
top_songs = top_songs.query("""released_year >= 2010 """)
fig = px.bar(top_songs, x='released_year', y='streams', hover_data=['track_name', 'artist(s)_name'], title='top 3 songs streamed by year',
color_continuous_scale="Blues", color='streams')
fig.show(renderer='notebook')
In [ ]:
song_keys = data[["released_year", "key","streams"]]
group = song_keys.groupby('key')['streams'].sum().reset_index()
def to_millions(x):
return x/1e6
group["value_in_millions"] = group["streams"].apply(to_millions)
fig = px.pie(group, values="value_in_millions", names="key",
title="key share in songs")
fig.show(renderer='notebook')